## ----setseedch4, echo = FALSE--------------------------------------------
set.seed(1982)

knitr::opts_chunk$set(
                      echo = FALSE,
                      warning = FALSE,
                      message = FALSE
)


## ----loadlibsch4---------------------------------------------------------
require(knitr)
library(stargazer)
library(effects)
library(pscl)
library(MASS)
library(ordinal)
library(tidyverse)
library(gridExtra)
library(pander)
library(arm)
library(rstanarm)
library(broom)
library(Amelia)
source("common_funcs.R")


## ----loaddatach4---------------------------------------------------------
load("ch2_data.RData")


## ----varcreation---------------------------------------------------------
sheet1$GovtIsAppellant <- (sheet1$APPTYPE == "E" & !grepl("14",sheet1$APPLOCN))
sheet1$GovtIsRespondent <- (sheet1$RESPTYPE == "E" & !grepl("14",sheet1$RESPLOC))
sheet1$Govt <-  (sheet1$GovtIsAppellant | sheet1$GovtIsRespondent)

sheet1$Govt <- as.numeric(sheet1$Govt)
### NAs for Govt are zeros -- NAs only exist b/c of some family law cases
sheet1$Govt[is.na(sheet1$Govt)] <- 0
sheet1$GovtIsAppellant[is.na(sheet1$GovtIsAppellant)] <- 0
sheet1$GovtIsRespondent[is.na(sheet1$GovtIsRespondent)] <- 0
sheet1$HR <- as.numeric(sheet1$HR=="Yes")
sheet1$DEVOLUTION <- as.numeric(sheet1$DEVOLUTION=="Yes")
sheet1$Area <- factor(sheet1$Area)
sheet1$Area <- relevel(sheet1$Area, "Public")

### Try and ordinary ordered probit
### Get workload six weeks before hearing date
sheet1$matchDate <- sheet1$HEARINGDATE1 - 42
sheet1 <- merge(sheet1, combWorkload,
	by.x = "matchDate",
	by.y = "Date",
	all.x = T,
	all.y = F)

### What about the early cases?
sheet1$workloadTotal[is.na(sheet1$workloadTotal)] <- mean(sheet1$workloadTotal, na.rm = T)

### Fix
### sheet1$nJudges
sheet1$nJudges <- factor(paste0(as.character(sheet1$nJudges)," judges"), 
                         levels = c("3 judges",
                                    "5 judges",
                                    "7 judges",
                                    "9 judges",
                                    "11 judges"),
	ordered = TRUE)

sheet1$Noncompliance <- 0.5
sheet1$Noncompliance[which(sheet1$GovtIsRespondent==1)] <- sheet1$OpinionBelow[which(sheet1$GovtIsRespondent==1)]
sheet1$Noncompliance[which(sheet1$GovtIsAppellant==1)] <- sheet1$OpinionBelowReversed[which(sheet1$GovtIsAppellant==1)]


## ----propplot, fig = TRUE, fig.cap = "Proportion of cases heard in panels of different sizes. "----
plot.df <- sheet1 %>%
    distinct(NEUTRAL, nJudges) %>%
    group_by(nJudges) %>% 
    summarize(nCases = n()) %>%
    mutate(prop = nCases / sum(nCases)) %>%
    filter(!is.na(nJudges)) 

p1 <- ggplot(plot.df, aes(x = nJudges, y = prop)) +
    scale_x_discrete("Number of judges") +
    scale_y_continuous("Proportion of cases",
                       labels = scales::percent) + 
    geom_bar(stat = "identity") +
    theme_uksc()

print(p1)



## ----cjt, echo = FALSE, fig = TRUE, results = "hide", fig.width = 6, fig.height = 8, fig.cap = "Committee size and correct decisions"----
cjt <- function(p, n) {
	if (n %% 2 ==0) {
		stop("Committee size must be odd")
	}
	h = (n+1)/2
	h = h:n
	res <- sum(choose(n, h) * p^h * (1-p)^(n-h))
	return(res)
}

### Following colours: brewer.pal("Dark2")
mycol <- c("#1b9e77","#d95f02","#7570b3") 

plot.df <- expand.grid(panels = c(3, 5, 7, 9, 11),
                       p = c(0.55, 0.66, 0.9))
plot.df$group <- apply(plot.df, 1, function(x) {
    cjt(n = x["panels"], p = x["p"])
})
plot.df$label <- paste0("Individual\nprobability: ", plot.df$p)

p1 <- ggplot(plot.df, aes(x = panels, y = group,
                    color = factor(p),
                    linetype = factor(p),
                    shape = factor(p))) +
    geom_point(size = 4) +
    geom_line() +
    scale_y_continuous("Probability of the group\nreaching a correct decision",
                       labels = scales::percent) +
    scale_x_continuous("Panel size",
                       breaks = c(3, 5, 7, 9, 11),
                       limits = c(2, 14)) +
    geom_text(data = subset(plot.df, panels == 11),
              aes(label = label),
              adj = 0,
              size = 3,
              color = "black",
              nudge_x = 0.25) + 
    theme_uksc() +
    labs(title = "Probabilities of reaching correct decision") + 
    theme(legend.position = "none") 

plot.df <- plot.df %>%
    group_by(p) %>%
    mutate(change = group - lag(group))

p2 <- ggplot(subset(plot.df, !is.na(change)),
             aes(x = panels,
                 y = change,
                 colour = factor(p),
                 linetype = factor(p),
                 shape = factor(p))) +
    geom_point(size = 4) +
    geom_line() +
    scale_y_continuous("Change in the probability of \na correct decision",
                       labels = scales::percent) +
    scale_x_continuous("Change in panel size",
                       breaks = c(5, 7, 9, 11),
                       labels = c("3->5", "5->7", "7->9", "9->11"),
                       limits = c(4.5, 14)) +
    geom_text(data = subset(plot.df, panels == 11),
              aes(label = label),
              adj = 0,
              size = 3,
              color = "black",
              nudge_x = 0.25) + 
    theme_uksc() +
    labs(title = "Changes in probabilities") + 
    theme(legend.position = "none")


grid.arrange(p1, p2, ncol=1)



## ----importancebysize, fig = TRUE, fig.cap = "Panel size by case importance. Figure excludes panels of three (2 cases) or eleven (1 case). "----

plot.df <- sheet1 %>%
    filter(!is.na(nJudges)) %>%
    filter(nJudges %in% c("5 judges", "7 judges", "9 judges")) %>%
    group_by(Importance) %>%
    mutate(label = paste0(Importance, "\n(", n(), " cases)")) %>% 
    group_by(Importance, nJudges) %>%
    summarize(label = unique(label), nCases = n()) %>%
    group_by(Importance) %>%
    mutate(Prop = nCases / sum(nCases))

ggplot(plot.df, aes(x = label, y = nCases, fill = nJudges)) +
    geom_bar(stat = "identity", position = position_dodge()) +
    scale_x_discrete("Case importance") +
    scale_y_continuous("Number of cases") +
    scale_fill_discrete("Size of panel") + 
    theme_uksc() +
    theme(legend.position = "bottom")



## ----rescalevars, results = "hide"---------------------------------------
keep.vars <- c("nJudges",
               "Importance", "OpinionBelow",
               "Area",
               "workloadTotal", "Noncompliance",
               "Govt", "HR")
tmp <- amelia(sheet1[, keep.vars], m = 1,
              boot.type = "none",
              ords = "nJudges",
              noms = "Area")

sheet1.bak <- sheet1
sheet1 <- tmp$imputations[[1]]

vars <- c("Importance", "OpinionBelow",
          "workloadTotal", "Noncompliance")
means_std_vars <- numeric()
sd_std_vars <- numeric()
for (v in vars) {
    means_std_vars <- c(means_std_vars,
                        mean(sheet1[,v], na.rm = TRUE))
    sd_std_vars <- c(sd_std_vars,
                        sd(sheet1[,v], na.rm = TRUE))
    sheet1[,v] <- arm::rescale(sheet1[,v])
}

names(means_std_vars) <- names(sd_std_vars) <- vars

sheet1$Area <- relevel(sheet1$Area,
                      "Civil")


## ----legalmod, results = "hide", fig = TRUE, fig.cap = "Legal model of panel size decisions. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 5----
## Figure height should be what, (nCoefs + 2) / 2.5?
legalmod <- polr(nJudges ~ Importance * OpinionBelow + Area + HR,
                 data = sheet1)

legalmod.rstanarm <- stan_polr(nJudges ~ Importance * OpinionBelow + Area + HR,
                               data = sheet1,
                               chains = 2, cores = 2, seed = 1982,
                               prior = R2(0.15))


mycoefplot(legalmod.rstanarm,
           ylab = "Coefficient value",
           sec.ylab = "Odds are ... times greater",
           variable.labels = list("(Intercept)" = "Intercept",
                                  "Importance" = "Importance",
                                  "OpinionBelow" = "Disagreement below",
                                  "HR" = "Human rights claim",
                                  "AreaN Ireland" = "N Irish case",
                                  "AreaScots" = "Scottish case",
                                  "AreaFamily" = "Family law case",
                                  "AreaPublic" = "Public law case",
                                  "AreaCriminal" = "Criminal law case",
                                  "AreaChancery" = "Tax and chancery case",
                                  "Importance:OpinionBelow" = "Importance by disagreement below"))




import <- tidy(legalmod.rstanarm) %>%
    filter(term == "Importance") %>%
    mutate(estimate = round(exp(estimate), 2)) %>% 
    pull(estimate)

disbelow <- tidy(legalmod.rstanarm) %>%
    filter(term == "OpinionBelow") %>%
    mutate(estimate = round(exp(estimate), 2)) %>% 
    pull(estimate)



## ----orgmod, results = "hide", fig = TRUE, fig.cap = "Organisational model of panel size decisions. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. Coefficients for Importance and Workload have been standardised. ", fig.width = 6, fig.height = 2----
## Figure height should be what, (nCoefs + 2) / 2.5?
orgmod <- polr(nJudges ~ Importance + workloadTotal,
               data = sheet1)

orgmod.rstanarm <- stan_polr(formula(orgmod),
                             data = sheet1,
                             chains = 2, cores = 2, seed = 1982, 
                             prior = R2(0.1, what = "median"))

mycoefplot(orgmod.rstanarm,
           ylab = "Coefficient value",
           sec.ylab = "Odds are ... times greater",
           variable.labels = list("(Intercept)" = "Intercept",
                                  "Importance" = "Importance",
                                  "workloadTotal" = "Court workload"))


wl <- tidy(orgmod.rstanarm) %>%
    filter(term == "workloadTotal") %>%
    mutate(estimate = round(exp(estimate), 2)) %>%
    pull(estimate)


## ----polmod, results = "hide", fig = TRUE, fig.cap = "Political model of panel size decisions. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 2----
## Figure height should be what, (nCoefs + 2) / 2.5?
polmod <- polr(nJudges ~ Govt + Noncompliance + HR,
               data = sheet1)

polmod.rstanarm <- stan_polr(formula(polmod),
                             data = sheet1,
                             chains = 2, cores = 2, seed = 1982, 
                             prior = R2(0.1))

mycoefplot(polmod.rstanarm,
           ylab = "Coefficient value",
           sec.ylab = "Odds are ... times greater",
           variable.labels = list("(Intercept)" = "Intercept",
                                  "Prob" = "Opinion below",
                                  "Govt" = "Government involved",
                                  "Noncompliance" = "Risk of non-compliance",
                                  "HR" = "Human rights issue"))

hr1 <- tidy(polmod.rstanarm) %>%
    filter(term == "HR") %>%
    mutate(estimate = round(exp(estimate), 2)) %>%
    pull(estimate)


## ----combmodch4, results = "hide", fig = TRUE, fig.cap = "Combined model of panel size decisions. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 10, out.width = "100%"----
## Figure height should be what, (nCoefs + 2) / 2.5?
combmod <- polr(nJudges ~ Importance * OpinionBelow +
                    workloadTotal +                     
                    Govt + Noncompliance + HR + Area,
                data = sheet1)

combmod.alt <- polr(nJudges ~ Importance * poly(OpinionBelow, 2) +
                        workloadTotal +                     
                        Govt + Noncompliance + HR + Area,
                    data = sheet1)

combmod.rstanarm <- stan_polr(formula(combmod),
                              data = sheet1,
                              chains = 2, cores = 2, seed = 1982, 
                              prior = R2(0.1))

combmod.alt.rstanarm <- stan_polr(formula(combmod.alt),
                                  data = sheet1,
                                  chains = 2, cores = 2, seed = 1982, 
                                  prior = R2(0.1))

mycoefplot(combmod.rstanarm,
           ylab = "Coefficient value",
           sec.ylab = "Odds are ... times greater",
           variable.labels = list("(Intercept)" = "Intercept",
                                  "OpinionBelow" = "Disagreement below",
                                  "Govt" = "Government involved",
                                  "HR" = "Human rights claim",
                                  "Importance:OpinionBelow" = "Importance by disagreement below",
                                  "workloadTotal" = "Court workload",
                                  "Noncompliance" = "Risk of non-compliance",
                                  "AreaN Ireland" = "N Irish case",
                                  "AreaScots" = "Scottish case",
                                  "AreaFamily" = "Family law case",
                                  "AreaPublic" = "Public law case",
                                  "AreaCriminal" = "Criminal law case",
                                  "AreaChancery" = "Tax and chancery case"))

workload1 <- tidy(combmod.rstanarm) %>%
    filter(term == "workloadTotal") %>%
    mutate(estimate = round(estimate, 2)) %>%
    pull(estimate)



## ----predsfromcombinedmodel----------------------------------------------
basecase <- data.frame(Importance = 0,
                       Area = "Civil",
                       OpinionBelow = 0,
                       workloadTotal = 0,
                       Noncompliance = 0,
                       Govt = 0,
                       nJudges = 5,
                       HR = 0)

basecase$Area <- factor(basecase$Area,
                        levels = levels(sheet1$Area))

prob_large_panel <- posterior_linpred(combmod.rstanarm, newdata = basecase)
sumdf <- as.data.frame(summary(combmod.rstanarm)) 

cp <- sumdf[grep("|",
                 rownames(sumdf),
                 fixed = TRUE),
            "mean"]
### Probability of panel of seven or greater
base_prob_large_panel  <- plogis(prob_large_panel - cp[2])
mean_base_prob <- round(100 * mean(base_prob_large_panel))



## ----scenarios-----------------------------------------------------------
### Scenario (1)
### workloadTotal +1 SD

scen1 <- basecase
scen1$workloadTotal <- scen1$workloadTotal +
    2 * sd(sheet1$workloadTotal, na.rm = TRUE)
prob_large_panel <- posterior_linpred(combmod.rstanarm, newdata = scen1)
### Probability of panel of seven or greater
scen1_prob_large_panel  <- plogis(prob_large_panel - cp[2])
scen1_delta <- scen1_prob_large_panel - base_prob_large_panel
scen1_delta <- data.frame(delta.bar = mean(scen1_delta),
                          delta.lo = quantile(scen1_delta, 0.025),
                          delta.hi = quantile(scen1_delta, 0.975),
                          delta.llo = quantile(scen1_delta, 1/12),
                          delta.hhi = quantile(scen1_delta, 11/12))

### Scenario (2)
### Human Rights issue
scen2 <- basecase
scen2$HR <- 1
prob_large_panel <- posterior_linpred(combmod.rstanarm, newdata = scen2)
### Probability of panel of seven or greater
scen2_prob_large_panel  <- plogis(prob_large_panel - cp[2])
scen2_delta <- scen2_prob_large_panel - base_prob_large_panel
scen2_delta <- data.frame(delta.bar = mean(scen2_delta),
                          delta.lo = quantile(scen2_delta, 0.025),
                          delta.hi = quantile(scen2_delta, 0.975),
                          delta.llo = quantile(scen2_delta, 1/12),
                          delta.hhi = quantile(scen2_delta, 11/12))

### Scenario (3)
### Govt involvement
scen3 <- basecase
scen3$Govt <- 1
prob_large_panel <- posterior_linpred(combmod.rstanarm, newdata = scen3)
### Probability of panel of seven or greater
scen3_prob_large_panel  <- plogis(prob_large_panel - cp[2])
scen3_delta <- scen3_prob_large_panel - base_prob_large_panel
scen3_delta <- data.frame(delta.bar = mean(scen3_delta),
                          delta.lo = quantile(scen3_delta, 0.025),
                          delta.hi = quantile(scen3_delta, 0.975),
                          delta.llo = quantile(scen3_delta, 1/12),
                          delta.hhi = quantile(scen3_delta, 11/12))

### Scenario (4)
### Risk of non-compliance
scen4 <- basecase
scen4$Govt <- 1
scen4$Noncompliance <- 1

prob_large_panel <- posterior_linpred(combmod.rstanarm, newdata = scen4)
### Probability of panel of seven or greater
scen4_prob_large_panel  <- plogis(prob_large_panel - cp[2])
scen4_delta <- scen4_prob_large_panel - scen3_prob_large_panel
scen4_delta <- data.frame(delta.bar = mean(scen4_delta),
                          delta.lo = quantile(scen4_delta, 0.025),
                          delta.hi = quantile(scen4_delta, 0.975),
                          delta.llo = quantile(scen4_delta, 1/12),
                          delta.hhi = quantile(scen4_delta, 11/12))



## ----interaxplot, fig = TRUE, fig.cap = "Probability of larger panels as a function of division in lower courts. The distribution of division on lower courts is shown as a rug on the horizontal axis; positions have been jittered slightly for ease of plotting. ", fig.width = 6, fig.height = 4----
newdata <- expand.grid(Importance = sort(unique(sheet1$Importance)),
                       OpinionBelow = unique(sheet1$OpinionBelow),
                       workloadTotal = mean(sheet1$workloadTotal),
                       Noncompliance = mean(sheet1$Noncompliance),
                       Govt = mean(sheet1$Govt),
                       Area = "Civil",
                       HR = mean(sheet1$HR))

newdata$Area <- factor(newdata$Area,
                        levels = levels(sheet1$Area))

newpos <- posterior_linpred(combmod.rstanarm, newdata = newdata)
newpos <- plogis(newpos - cp[2])

newdata$lp <- apply(newpos, 2, mean)
newdata$lp.lo <- apply(newpos, 2, quantile, probs = 0.025)
newdata$lp.hi <- apply(newpos, 2, quantile, probs = 1 - 0.025)

plot.df <- subset(newdata,
                  Importance %in% c(min(Importance), max(Importance))) %>%
    group_by(Importance) %>%
    arrange(OpinionBelow)

plot.df$label <- ifelse(plot.df$Importance == min(plot.df$Importance),
                        "Cases of the\nlowest\nimportance",
                 ifelse(plot.df$Importance == max(plot.df$Importance),
                        "Cases of the\nhighest\nimportance",
                        NA))

## Back-transform the OpinionBelow variable
plot.df$OpinionBelow <- plot.df$OpinionBelow *
    2 * sd_std_vars["OpinionBelow"] +
    means_std_vars["OpinionBelow"]

ggplot(plot.df,
       aes(x = OpinionBelow)) +
    geom_point(aes(y = lp,
                   color = factor(Importance),
                   shape = factor(Importance)),
               size = 2) + 
    geom_smooth(aes(y = lp,
                       ymax = lp.hi,
                       ymin = lp.lo,
                       color = factor(Importance)),
                   stat = "identity") +
    geom_rug(data = sheet1.bak,
                aes(x = OpinionBelow, y = 0),
                position = position_jitter(width = 0.01, height = 0),
             sides = "b", alpha = 0.5) +
    geom_text(data = subset(plot.df, OpinionBelow == min(OpinionBelow,
                                                 na.rm = TRUE)),
              aes(x = OpinionBelow, y = lp, label = label),
              hjust = 1,
              nudge_x = -0.025,
              size = 3 ) + 
    scale_x_continuous("Disagreement in courts below",
                       limits = c(0.05, 0.5)) +
    scale_y_continuous("Probability of a panel\nof 7 or more judges",
                       labels = scales::percent) +
    scale_shape_manual(values = my.pchs, guide = "none") + 
    scale_colour_discrete(guide = "none") +
    theme_uksc()



## ----marginalplot1, fig = TRUE, fig.cap = "The marginal effects of increasing importance (left panel) and increasing division below (right panel). Ribbons indicate credible intervals. Outer ribbon gives 95 percent credible intervals; inner ribbon gives 83 percent credible intervals. "----

### Evaluate this stuff at different levels of prob

holder <- list()
for (p in unique(sheet1$OpinionBelow)) {
    highimp <- lowimp <- basecase
    highimp$OpinionBelow <- lowimp$OpinionBelow <- p
    highimp$Importance <- 0.5
    lowimp$Importance <- 0
    p_highimp <- posterior_linpred(combmod.rstanarm, newdata = highimp)
    p_lowimp <- posterior_linpred(combmod.rstanarm, newdata = lowimp)
    p_highimp <- plogis(p_highimp - cp[3])
    p_lowimp <- plogis(p_lowimp - cp[3])
    retval <- data.frame(xbar = mean(p_highimp - p_lowimp),
                         xlo = quantile(p_highimp - p_lowimp, 0.025),
                         xhi = quantile(p_highimp - p_lowimp, 1 - 0.025),
                         xllo = quantile(p_highimp - p_lowimp, 1/12),
                         xhhi = quantile(p_highimp - p_lowimp, 11/12))
    retval$OpinionBelow <- p
    holder[[which(unique(sheet1$OpinionBelow) == p)]] <- retval
}

plot.df <- do.call("rbind", holder)
plot.df <- plot.df %>%
    arrange(OpinionBelow)

p1 <- ggplot(plot.df, aes(x = OpinionBelow, y = xbar)) +
    geom_smooth(stat = "identity", aes(ymin = xlo, ymax = xhi)) +
    geom_smooth(stat = "identity", aes(ymin = xllo, ymax = xhhi)) +
    geom_point() +
    geom_rug(data = sheet1,
                aes(x = OpinionBelow, y = 0),
                position = position_jitter(width = 0.01, height = 0),
             sides = "b", alpha = 0.5) +
    scale_x_continuous("Disagreement below") +
    scale_y_continuous("Marginal effect of moving from\nlowest to highest importance") +
    geom_hline(yintercept = 0) + 
    scale_colour_discrete(guide = "none") +
    theme_uksc()

holder <- list()
for (p in unique(sheet1$Importance)) {
    highdiv <- lowdiv <- basecase
    highdiv$Importance <- lowdiv$Importance <- p
    highdiv$OpinionBelow <- max(sheet1$OpinionBelow)
    lowdiv$OpinionBelow <- min(sheet1$OpinionBelow)
    p_highdiv <- posterior_linpred(combmod.rstanarm, newdata = highdiv)
    p_lowdiv <- posterior_linpred(combmod.rstanarm, newdata = lowdiv)
    p_highdiv <- plogis(p_highdiv - cp[3])
    p_lowdiv <- plogis(p_lowdiv - cp[3])
    retval <- data.frame(xbar = mean(p_highdiv - p_lowdiv),
                         xlo = quantile(p_highdiv - p_lowdiv, 0.025),
                         xhi = quantile(p_highdiv - p_lowdiv, 1 - 0.025),
                         xllo = quantile(p_highdiv - p_lowdiv, 1/12),
                         xhhi = quantile(p_highdiv - p_lowdiv, 11/12))
    retval$Importance <- p
    holder[[which(unique(sheet1$Importance) == p)]] <- retval
}

plot.df <- do.call("rbind", holder)
plot.df <- plot.df %>%
    arrange(Importance)

p2 <- ggplot(plot.df, aes(x = Importance, y = xbar)) +
    geom_smooth(stat = "identity", aes(ymin = xlo, ymax = xhi)) +
    geom_smooth(stat = "identity", aes(ymin = xllo, ymax = xhhi)) +
    geom_point() +
    geom_rug(data = sheet1,
                aes(x = Importance, y = 0),
                position = position_jitter(width = 0.01, height = 0),
             sides = "b", alpha = 0.5) +
    scale_x_continuous("Importance") +
    scale_y_continuous("Marginal effect of moving from\nlowest to highest\ndisagreement below") +
    geom_hline(yintercept = 0) + 
    scale_colour_discrete(guide = "none") +
    theme_uksc()

grid.arrange(p1, p2, ncol = 2)



## ----fitstatsch4, fig = TRUE, fig.cap = "Goodness-of-fit statistics for different models. Thin lines indicate 95 percent credible intervals; thick lines indicate 83 percent credible intervals. ", warning = FALSE----

epcp.rstanarm <- function(mod) {
    
    poslp <- posterior_linpred(mod)
    sumdf <- as.data.frame(summary(mod)) 
    cp <- sumdf[grep("|",
                     rownames(sumdf),
                     fixed = TRUE),
                "mean"]
    cp <- c(-Inf, cp, Inf)
    poslp2 <- apply(poslp, 2, cut,
                    breaks = cp,
                    labels = c("3 judges",
                               "5 judges",
                               "7 judges",
                               "9 judges",
                               "11 judges"))
    preds <- poslp2
    ## preds <- (poslp > 0.5)
    
    actual <- matrix(mod$y,
                     nrow = nrow(poslp2),
                     ncol = length(mod$y),
                     byrow = TRUE)
    correct <- (preds == actual)

    correct <- apply(correct, 1, mean)
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct),
                       lo = quantile(correct, 0.025),
                       hi = quantile(correct, 0.975),
                       llo = quantile(correct, 1/12),
                       hhi = quantile(correct, 11/12))

    the.loo <- loo(mod)
    looic <- the.loo$estimates["looic", 1]
    loose <- the.loo$estimates["looic", 2]
    the.loo <- data.frame(term = "LOOIC\n(smaller is better)",
                          mean = looic,
                          lo = looic + qnorm(1/40) * loose,
                          hi = looic + qnorm(39/40) * loose,
                          llo = looic + qnorm(1/24) * loose,
                          hhi = looic + qnorm(23/24) * loose)
    return(rbind(epcp, the.loo))
}

if (file.exists("cache/04_gof.RData")) {
    load("cache/04_gof.RData")
} else { 

    legal.stats <- epcp.rstanarm(legalmod.rstanarm)
    org.stats <- epcp.rstanarm(orgmod.rstanarm)
    pol.stats <- epcp.rstanarm(polmod.rstanarm)
    comb.stats <- epcp.rstanarm(combmod.rstanarm)
    
    legal.stats$Model <- "Legal"
    org.stats$Model <- "Organisational"
    pol.stats$Model <- "Political"
    comb.stats$Model <- "Combined"

    null.stats <- data.frame(Model = rep("Null", 2),
                             term = c("PCP\n(bigger is better)",
                                      "LOOIC\n(smaller is better)"),
                             mean = c(mean(legalmod.rstanarm$y == "5 judges",
                                           na.rm = TRUE),
                                      NA))
    
    gof.df <- rbind(
        rbind(
            rbind(legal.stats,
                  org.stats),
            pol.stats),
        comb.stats)
    
    gof.df <- merge(gof.df, null.stats, all = TRUE)

    my.levels <- gof.df %>%
        filter(grepl("LOOIC", term)) %>%
        arrange(mean) %>%
        pull(Model)
    
    gof.df$Model <- factor(gof.df$Model,
                       levels = rev(my.levels),
                       ordered = TRUE)

    legal.loo <- loo(legalmod.rstanarm)
    combined.loo <- loo(combmod.rstanarm)
    political.loo <- loo(polmod.rstanarm)
    org.loo <- loo(orgmod.rstanarm)
    
    save(list = c("legal.loo", "combined.loo",
                  "political.loo", "org.loo",
                  "gof.df"),
         file = "cache/04_gof.RData")
}

ggplot(gof.df, aes(x = Model, y = mean, ymax = hi, ymin = lo)) +
    geom_linerange(size = inner.bar.size,
                   colour = inner.bar.col,
                   alpha = 0.8) +
        geom_linerange(aes(ymin = llo, ymax = hhi),
                       size = outer.bar.size,
                       colour = outer.bar.col,
                       alpha = 0.8) +
    geom_point(aes(colour = outer.bar.col),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               shape= my.pchs[1],
               stroke = 1.5) +
    scale_colour_identity() + 
    facet_wrap(~term, scales = "free_x") +
    scale_y_continuous("Value") +
    coord_flip() +
    theme_uksc()

legal.loo <- loo(legalmod.rstanarm)
combined.loo <- loo(combmod.rstanarm)
combined.alt.loo <- loo(combmod.alt.rstanarm)
## compare_models(combined.loo, loo(polmod.rstanarm))

## compare_models(loo(polmod.rstanarm), loo(polmod.alt.rstanarm))

## Difference between LRT and AIC for testing
## LRT testing whether a restriction is appropriate

ll2 <- log_lik(combmod.rstanarm)
ll2 <- rowSums(ll2)

ll1 <- log_lik(combmod.alt.rstanarm)
ll1 <- rowSums(ll1)

D <- 2 * (ll2 - ll1)
